Author

Joe Boktor

Published

July 22, 2023

Code
library(tidyverse)
library(magrittr)
library(reticulate)
library(glue)
library(bio3d)
library(protr)
library(seqinr)
library(future)
library(batchtools)
library(future.batchtools)
library(fs)
library(tictoc)
library(listenv)
library(progress)
library(strex)
library(data.table)
library(kableExtra)
# Plotting functions
library(ggpackets)
library(ggpointdensity)
library(ggside)
library(patchwork)
library(ggridges)
library(scales)
library(plotly)
library(ggsci)
library(viridis)
library(ggforce)
library(seriation)
# protein structure analysis
library(bio3d)
library(r3dmol)

tmpdir <- "/central/scratch/jbok/tmp"
homedir <- "/central/groups/MazmanianLab/joeB"
wkdir <- glue(
"{homedir}/Microbiota-Immunomodulation/Microbiota-Immunomodulation"
)
src_dir <- glue("{wkdir}/notebooks")
source(glue("{src_dir}/R-scripts/helpers_general.R"))
source(glue("{src_dir}/R-scripts/helpers_pdb-wrangling.R"))
protein_catalogs <- glue("{homedir}/Downloads/protein_catalogs")
# 1.5gb  limit (1500*1024^2 = 1572864000)
options(future.globals.maxSize= 1572864000)

loading in list of model alignment summaries.

Code
pdb_mod_summaries <- list.files(
  glue("{wkdir}/data/interim/foldseek_results/top_pdb_models"),
  full.names = TRUE
) %>%
  purrr::set_names(
    basename(.) %>% str_remove_all("2023-08-07_|_.rds")
  )
Code
readRDS(pdb_mod_summaries[["C5_ENSG00000106804"]])$C5_ENSG00000106804$`3PVM.pdb_A`$`Alphafold UniProt50`$Bacteria
readRDS(pdb_mod_summaries[["C5_ENSG00000106804"]])$C5_ENSG00000106804$`5I5K.pdb_B`$ESMAtlas30$Unknown
readRDS(pdb_mod_summaries[["C5_ENSG00000106804"]])$C5_ENSG00000106804$`AF-P01031-F1-model_v4.pdb`$ESMAtlas30$Unknown